clear
set more off, perm
cd /Users/zachbrown/Projects/PriceTransparency/Data/

///////////////////////////////////////////////////////////////////
// Import ZCTA income from 2007-2011 ACS 5-year Estimates Table DP03
///////////////////////////////////////////////////////////////////

insheet using zip_info/ACS_11_5YR_DP03/ACS_11_5YR_DP03_with_ann.csv, comma names clear

// Make variable labels
foreach x of varlist * {
   label var `x' `"`=`x'[1]'"' 
	}
drop in 1

// Keep only some vars
gen geoid_str = substr(geodisplaylabel,7,5)
keep geoid_str geoid2 hc01_vc85 hc01_vc86

foreach x of varlist geoid2 hc01_vc85 hc01_vc86 {
   destring `x', replace force
	}
desc

rename geoid2 zcta_crosswalk
rename hc01_vc85 zip_median_income
rename hc01_vc86 zip_mean_income

label var zip_median_income "Median zip code income (2011$)"
label var zip_mean_income "Mean zip code income (2011$)"

drop geoid_str
order zcta_crosswalk
save zip_info/ACS_11_5YR_DP03/ACS_11_5YR_DP03_with_ann.dta, replace



///////////////////////////////////////////////////////////////////
// Import ZCTA education from 2007-2011 ACS 5-year Estimates Table S1501
///////////////////////////////////////////////////////////////////

insheet using zip_info/ACS_11_5YR_S1501/ACS_11_5YR_S1501_with_ann.csv, comma names clear

// Make variable labels
foreach x of varlist * {
   label var `x' `"`=`x'[1]'"' 
	}
drop in 1

// Keep only some vars
gen geoid_str = substr(geodisplaylabel,7,5)
foreach x of varlist geoid2 hc01_est_vc01 hc01_est_vc07 hc01_est_vc16 hc01_est_vc17 {
   destring `x', replace force
	}


gen zip_pop = hc01_est_vc01 + hc01_est_vc07
keep geoid_str geoid2 zip_pop hc01_est_vc16 hc01_est_vc17 
desc

summ hc01_est_vc16 hc01_est_vc17 
rename geoid2 zcta_crosswalk
rename hc01_est_vc16 zip_pct_hs
rename hc01_est_vc17 zip_pct_ba

label var zip_pop "Zip code adult population"
label var zip_pct_hs "Zip code percent HS"
label var zip_pct_ba "Zip code percent BA"

drop geoid_str
order zcta_crosswalk zip_pop
save zip_info/ACS_11_5YR_S1501/ACS_11_5YR_S1501_with_ann.dta, replace


///////////////////////////////////////////////////////////////////
// Import ZCTA land area and latitude/longitude
///////////////////////////////////////////////////////////////////

insheet using zip_info/Gaz_zcta_national.txt, tab names clear
keep geoid aland intptlat intptlong
rename geoid zcta_crosswalk
rename aland zip_land_area
rename intptlat zcta_lat
rename intptlong zcta_long
label var zip_land_area "ZCTA land area (square meters)"

keep zcta_crosswalk zip_land_area zcta_lat zcta_long
save zip_info/Gaz_zcta_national.dta, replace


///////////////////////////////////////////////////////////////////
// Import zip to zcta crosswalk
///////////////////////////////////////////////////////////////////
insheet using zip_info/Zip_to_ZCTA_Crosswalk_JSI2014.csv, comma names clear
encode po_name, generate(zip_name)
rename state state_str
gen in_nh1 = (state_str=="NH")
encode state_str, generate(state)
drop state_str po_name

label var state "State"
label var zip_name "Zip post office name"

save zip_info/Zip_to_ZCTA_Crosswalk_JSI2014.dta, replace


///////////////////////////////////////////////////////////////////
// Import Rural/Urban information
///////////////////////////////////////////////////////////////////
import excel "zip_info/2006 Complete Excel RUCA file.xls", firstrow clear

drop ZIPA
rename ZIPN zip

gen rural_class = floor(RUCA20)

// See http://depts.washington.edu/uwruca/ruca-codes.php for codes
label define rural_class 1 "Metro area core" ///
	2 "Metro area high commuting" ///
	3 "Metro area low commuting" ///
	4 "Micropolitan area core" ///
	5 "Micropolitan high commuting" ///
	6 "Micropolitan low commuting" ///
	7 "Small town core" ///
	8 "Small town high commuting" ///
	9 "Small town low commuting" ///
	10 "Rural areas"

label val rural_class rural_class

gen in_nh2 = (STNAME=="New Hampshire")

keep zip rural_class in_nh2
save zip_info/rural_class.dta, replace





///////////////////////////////////////////////////////////////////
// Merge all
///////////////////////////////////////////////////////////////////

use zip_info/ACS_11_5YR_DP03/ACS_11_5YR_DP03_with_ann.dta, clear

merge 1:1 zcta_crosswalk using zip_info/ACS_11_5YR_S1501/ACS_11_5YR_S1501_with_ann.dta
drop _merge

// Merge land area
merge 1:1 zcta_crosswalk using zip_info/Gaz_zcta_national.dta
drop _merge

// Merge with zip to zcta crosswalk
merge 1:m zcta_crosswalk using zip_info/Zip_to_ZCTA_Crosswalk_JSI2014.dta
keep if _merge==3
drop _merge

// Merge with urban rural classification
merge 1:1 zip using zip_info/rural_class.dta
drop _merge

duplicates report zip

// Label New Hampshire zip codes
tab in_nh1 in_nh2, miss
gen in_nh = in_nh1==1 | in_nh2==1
tab in_nh, miss

drop zcta_crosswalk in_nh1 in_nh2
compress
order zip zip_name state zip_pop
save zip_info/Zip_chars.dta, replace



